/*--------------------------------------------------------------------------
Calculate descriptive statistics of Latinobar�metro's sample
Last modification: 04/14/2021
Uses:	           perceptions_data.dta
Produces:          Table A1, A2
--------------------------------------------------------------------------*/

global raw  "C:\Users\USERNAME\"
global code "${root}\code"
global data "${root}\data"
global res  "${root}\results"

* Data available
use "${data}\perceptions_data.dta", replace
drop if ano > 2015

order ciudad region
collapse (count) agua_potable - telefono_fijo , by(ano)	

foreach var of varlist agua_potable - telefono_fijo {
	rename `var' yr`var'
}
	
reshape long yr, i(ano) j(variable) string
reshape wide yr, i(variable) j(ano)
compress
export excel using "${res}\Data analysis.xlsx", sheet("Variables Availability") sheetreplace firstrow(variables)	


* Descriptive statistics of the database
use "${data}\perceptions_data.dta", replace
drop if ano > 2015

gen     assets = 1 if cloaca == 1 & computadora  == 1 & lavarropas == 1 & telefono_fijo == 1 & auto == 1
replace assets = 0 if cloaca == 0 | computadora  == 0 | lavarropas == 0 | telefono_fijo == 0 | auto == 0

tempfile db
save `db', replace

loc vars "edad hombre casado catolico alfabeto secondary secondary_padre desocupa pea agua_potable cloaca auto computadora heladera propieta telefono_fijo celular lavarropas ideology assets"

foreach stat in mean sd count {
	use `vars' pondera using `db', replace
	order `vars'
	tempfile tmp_`stat'	
	
	if "`stat'" != "count" replace desocupa = . if pea == 0
	
	collapse (`stat') `vars' [w=pondera]

	foreach var of local vars {
		rename `var' `stat'`var'
	}

	gen i = 1
	reshape long `stat', i(i) j(variable) string
	drop i
	
	save `tmp_`stat'', replace
}

merge 1:1 variable using `tmp_mean', nogen
merge 1:1 variable using `tmp_sd', nogen

replace mean = mean * 100 if mean < 1 

replace Category = "Socio-demographic" if inlist(variable,"edad","hombre","casado","catolico","ideology")
replace Category = "Education and Labor market" if inlist(variable,"alfabeto","secondary","secondary_padre","desocupa","pea")
replace Category = "Access to services" if inlist(variable,"agua_potable","cloaca")
* replace Category = "Beliefs" if inlist(variable,"catolico","ideology")

replace variable = "Access to running water (%)" if variable == "agua_potable"
replace variable = "Literate (%)" if variable == "alfabeto"
replace variable = "Car (%)" if variable == "auto"
replace variable = "Married or civil union (%)" if variable == "casado"
replace variable = "Catholic religion (%)" if variable == "catolico"
replace variable = "Mobile (%)" if variable == "celular"
replace variable = "Access to a sewage (%)" if variable == "cloaca"
replace variable = "Computer (%)" if variable == "computadora"
replace variable = "Unemployed (% Labor Force)" if variable == "desocupa"
replace variable = "Age" if variable == "edad"
replace variable = "Fridge (%)" if variable == "heladera"
replace variable = "Male (%)" if variable == "hombre"
replace variable = "Washing machine (%)" if variable == "lavarropas"
replace variable = "Economically active (%)" if variable == "pea"
replace variable = "Homeowner (%)" if variable == "propieta"
replace variable = "Secondary education or more (%)" if variable == "secondary"
replace variable = "Parents with secondary education (%)" if variable == "secondary_padre"
replace variable = "Landline (%)" if variable == "telefono_fijo"
replace variable = "Assets index" if variable == "assets"
replace variable = "Ideology (10 = right-wing)" if variable == "ideology"


rename (variable mean sd count) (Variable Mean Std_Dev Obs)

order Category
sort Category Variable 
export excel using "${res}\Data analysis.xlsx", sheet("Statistics of the sample") sheetreplace firstrow(variables)	



* Unfairness by groups
use "${data}\perceptions_data", replace
tempfile tab_file 
tempname tab
postfile `tab' str70(Year Category Level Class Percent) using `tab_file', replace

loc categories "all maxedu gedad hombre casado catolico relab id_pais "

foreach year in 1997 2001 2002 2007 2009 2010 2011 2013 2015 all {
	if ("`year'" == "all") local ifyear ""
	else local ifyear `"& ano == `year'"'
	
	forvalues class = 1(1)4 {
		foreach category of local categories {
		levels `category', local(options)
			foreach option of local options { 
				
				loc option_lab: label `category' `option'
				loc class_lab: label distribucion_justa `class'
				if "`category'" == "all" loc category_lab "All"
				if "`category'" == "maxedu" loc category_lab "Education level"
				if "`category'" == "gedad" loc category_lab "Age group"
				if "`category'" == "hombre" loc category_lab "Gender"
				if "`category'" == "casado" loc category_lab "Civil Status"
				if "`category'" == "catolico" loc category_lab "Religion"
				if "`category'" == "relab" loc category_lab "Type of employment"
				if "`category'" == "derecha" loc category_lab "Ideology"
				if "`category'" == "id_pais" loc category_lab "Country"
				if "`category'" == "size" loc category_lab "City size"
				
				sum all if `category' == `option' `ifyear' [w=pondera]
				loc tot = r(sum_w)
				
				sum all if `category' == `option' & distribucion_justa == `class'  `ifyear' [w=pondera]
				loc y = r(sum_w)/`tot'*100
						
				post `tab' ("`year'") ("`category_lab'") ("`option_lab'") ("`class_lab'") ("`y'")
			}
		}
	}
}


postclose `tab'
use `tab_file', clear
destring, replace

replace Level = "Argentina" if Level == "arg"
replace Level = "Bolivia" if Level == "bol"
replace Level = "Brazil" if Level == "bra"
replace Level = "Chile" if Level == "chl"
replace Level = "Colombia" if Level == "col"
replace Level = "Costa Rica" if Level == "cri"
replace Level = "Dominican Rep." if Level == "dom"
replace Level = "Ecuador" if Level == "ecu"
replace Level = "Guatemala" if Level == "gtm"
replace Level = "Honduras" if Level == "hnd"
replace Level = "Mexico" if Level == "mex"
replace Level = "Nicaragua" if Level == "nic"
replace Level = "Panama" if Level == "pan"
replace Level = "Peru" if Level == "per"
replace Level = "Paraguay" if Level == "pry"
replace Level = "El Salvador" if Level == "slv"
replace Level = "Uruguay" if Level == "ury"
replace Level = "Venezuela" if Level == "ven"

export excel using "${res}\Data analysis.xlsx", sheet("Fairness by group") sheetreplace firstrow(variables)	

compress
rename _all, lower
drop if year == "all"
destring _all, replace
save "${data}\fairness_groups.dta", replace


/* End of do-file */
exit
><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><><>
